package com.fengwk.support.util;

import java.util.List;

import org.apdplat.word.WordSegmenter;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.WordConfTools;

import com.fengwk.support.exception.BaseException;

/**
 * word分词工具,采用word分词
 * 教程:https://my.oschina.net/apdplat/blog/228619
 */
public class WordUtils {
	
	private WordUtils() {
		throw new BaseException("can not instance " + getClass());
	}

	/**
	 * 分割并去除停用词
	 * 
	 * @param content
	 * @return
	 */
	public static List<Word> splitWithStop(String content) {
		WordConfTools.set("stopwords.path", "classpath:stopwords.txt");
		return WordSegmenter.seg(content);
	}
	
	/**
	 * 分割不去除停用词
	 * 
	 * @param content
	 * @return
	 */
	public static List<Word> split(String content) {
		WordConfTools.set("stopwords.path", "");
		return WordSegmenter.seg(content);
	}
	
	/**
	 * 输出
	 * 
	 * @param words
	 */
	public static void print(List<Word> words) {
		for(Word word: words) {
			System.out.println(word.getText());
		}
	}
	
}
